from datetime import datetime as dt
from scipy.optimize import curve_fit
from great_tables import GT, loc, style
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
import bokeh.models as bm
import pandas as pd
import xarray as xr
import numpy as np
import os
output_notebook(hide_banner=True)Plotting Records
Part 3 of 3
This notebook is the last in a series of three notebooks demonstrating how daily and monthly record highs, lows, and averages are calculated from NOAA CO-OPS weather and tide station data. The notebook follows sequentially from NOAA-CO-OPS-records in which we calculated record highs, lows, and averages from observational data for a particular NOAA CO-OPS weather and tide station. Daily and monthly records were written to netCDF files. Here we visualize these records as plots and as a colored dataframe.
In the previous notebook we calculated several records of interest:
- Daily and monthly averages
- Record high daily and monthly averages*
- Record low daily and monthly averages*
- Average daily and monthly high
- Lowest daily and monthly high*
- Record daily and monthly high*
- Average daily and monthly low
- Highest daily and monthly low*
- Record daily and monthly low*
For those records marked with an asterisk (*), we also noted the year in which that particular record was set. Now let’s return to these statistics to visualize them.
Packages and configurations
As always, we first import the packages we need. We will use Bokeh to make interactive plots and great_tables to display the data behind the plots in a visually appealing manner.
To better visualize the seasonality of daily and monthly averages, average highs, and average lows, we will fit a curve to the calculated averages and plot these curves instead of the actual values. This will be done with curve_fit from SciPy.
By default, Python only displays warnings the first time they are thrown. Ideally, we want a code that does not throw any warnings, but it sometimes takes some trial and error to resolve the issue being warned about. So, for diagnostic purposes, we’ll set the kernel to always display warnings.
import warnings
warnings.filterwarnings('always')Functions
Let’s define functions to plot the daily and monthly data. These two plots will be similar in appearance but have some differences (for example, the x axis), so two separate functions will be needed.
First, we’ll need some helper functions. Some of these were used previously, while others are new:
def camel(text):
"""Convert 'text' to camel case"""
s = text.replace(',','').replace("-", " ").replace("_", " ")
s = s.split()
if len(text) == 0:
return text
return s[0].lower() + ''.join(i.capitalize() for i in s[1:])
def round_down(num, divisor):
"""Round num down to the nearest divisor.
For example, round_down(45.5, 10) will return 40.
"""
return num - (num%divisor)
def round_up(num, divisor):
"""Round num up to the nearest divisor.
For example, round_up(45.5, 10) will return 50.
"""
return num + (divisor - (num%divisor))
def cos_fit(data, plot=False):
"""Fit cosine curve to data"""
X = np.arange(0, len(data))/len(data)
# Initial parameter values
guess_freq = 1
guess_amplitude = 3*np.std(data)/(2**0.5)
guess_phase = 0
guess_offset = np.mean(data)
p0 = [guess_freq, guess_amplitude,
guess_phase, guess_offset]
# Function to fit
def my_cos(x, freq, amplitude, phase, offset):
return np.cos(x * freq + phase) * amplitude + offset
# Fit curve to data
fit = curve_fit(my_cos, X, data, p0=p0)
if plot:
fig, ax = plt.subplots(1, 1, figsize=(12,5))
ax.plot(data, label=data.name)
ax.plot(fit, color='red', label=f'Cosine fit')
ax.legend(loc='best')
plt.show()
else:
return my_cos(np.array(X), *fit[0])
Defining all of the colors in a dictionary will make it easier to customize everything later and will clean up the plotting codes. Below is a dictionary of three color schemes: “mg” are my chosen colors, “bm” colors are the same color scheme as Brian McNoldy’s figures on his website, and “cb” are colorblind-friendly colors.
# Color dictionary
# https://www.tutorialrepublic.com/css-reference/css-color-names.php
colors = dict(
mg=dict({
'Date': 'white',
'Month': 'white',
'Daily Average': '#F5F5F5',
'Monthly Average': '#F5F5F5',
'Record High Daily Average': '#ff8080',
'Record High Daily Average Year': '#ff8080',
'Record High Monthly Average': '#ff8080',
'Record High Monthly Average Year': '#ff8080',
'Record Low Daily Average': '#c1d5f8',
'Record Low Daily Average Year': '#c1d5f8',
'Record Low Monthly Average': '#c1d5f8',
'Record Low Monthly Average Year': '#c1d5f8',
'Average High': '#dc8d8d',
'Lowest High': '#e6aeae',
'Lowest High Year': '#e6aeae',
'Record High': '#d26c6c',
'Record High Year': '#d26c6c',
'Average Low': '#a2bff4',
'Highest Low': '#d1dffa',
'Highest Low Year': '#d1dffa',
'Record Low': '#74a0ef',
'Record Low Year': '#74a0ef',
'Years': 'white',
'Plot Light Color': '#D3D3D3'}),
bm=dict({
'Date': 'white',
'Month': 'white',
'Daily Average': 'gainsboro',
'Monthly Average': 'gainsboro',
'Record High Daily Average': 'mistyrose',
'Record High Daily Average Year': 'mistyrose',
'Record High Monthly Average': 'mistyrose',
'Record High Monthly Average Year': 'mistyrose',
'Record Low Daily Average': 'lavender',
'Record Low Daily Average Year': 'lavender',
'Record Low Monthly Average': 'lavender',
'Record Low Monthly Average Year': 'lavender',
'Average High': 'orangered',
'Lowest High': 'darkorange',
'Lowest High Year': 'darkorange',
'Record High': 'orange',
'Record High Year': 'orange',
'Average Low': 'mediumpurple',
'Highest Low': 'navyblue',
'Highest Low Year': 'navyblue',
'Record Low': 'lightblue',
'Record Low Year': 'lightblue',
'Years': 'white',
'Plot Light Color': 'white'}),
cb=dict({
'Date': 'white',
'Month': 'white',
'Daily Average': '#F5F5F5',
'Monthly Average': '#F5F5F5',
'Record High Daily Average': '#',
'Record High Daily Average Year': '#',
'Record High Monthly Average': '#',
'Record High Monthly Average Year': '#',
'Record Low Daily Average': '#',
'Record Low Daily Average Year': '#',
'Record Low Monthly Average': '#',
'Record Low Monthly Average Year': '#',
'Average High': '#dc8d8d',
'Lowest High': '#',
'Lowest High Year': '#',
'Record High': '#d26c6c',
'Record High Year': '#d26c6c',
'Average Low': '#a2bff4',
'Highest Low': '#',
'Highest Low Year': '#',
'Record Low': '#74a0ef',
'Record Low Year': '#74a0ef',
'Years': 'white',
'Plot Light Color': 'white'})
)The plots will be made using Bokeh for interactivity. Consequently, there are many steps involved in building and formatting the plot with the desired functionality. We will plot daily/monthly averages, average highs, and average lows as curves; record highs and record lows as points; and will highlight records set this year for emphasis. The plot will also contain a legend and a hoverbox that displays the values of each series for a given date when one hovers the mouse pointer over the plot. The functions below will be used to generate daily and monthly climatology plots, and comments within the functions explain what each step does.
Note that daily_climo also supports showing flood thresholds when used to plot water level data. These thresholds need to be retrieved for each site and passed as a dictionary, for example:
floods = {'Major Flood Threshold': 2.5,
'Moderate Flood Threshold': 1.7,
'Minor Flood Threshold': 1.3}def daily_climo(data, var, flood_thresholds=None, scheme='mg'):
"""Create a daily climatology plot for environmental variable 'var'
from 'data' using color scheme 'scheme'.
Inputs:
data: xarray containing climatological stats
var: str, one of the available environmental variables in 'data'
flood_threshold: dict containing flood thresholds to add to water
level plot
scheme: str specifying which color scheme to use. Options: 'mg'
for M. Grossi's, 'bm' for B. McNoldy's, or 'cb' to use a
colorblind scheme
"""
# Dates for x axis
df = data.sel(variable=var).to_dataframe().drop('variable', axis=1)
df['xdates'] = pd.date_range(start='2020-01-01', end='2020-12-31', freq='1D')
df['Average High Curve'] = cos_fit(df['Average High']).round(1)
df['Daily Average Curve'] = cos_fit(df['Daily Average']).round(1)
df['Average Low Curve'] = cos_fit(df['Average Low']).round(1)
# Record this year
thisYear = pd.to_datetime('today').year
thisYearRecords = (df==thisYear)[['Record High Year', 'Record Low Year']].sum().sum()
lastYearRecords = (df==thisYear-1)[['Record High Year', 'Record Low Year']].sum().sum()
df['High Records'] = df['Record High'].where(df['Record High Year'] == thisYear)
df['Low Records'] = df['Record Low'].where(df['Record Low Year'] == thisYear)
source = bm.ColumnDataSource(df)
# Create a new plot
ts_start = dt.strptime(data.attrs[f'{var} data range'][0], '%Y-%m-%d').strftime('%-m/%-d/%Y')
ts_end = dt.strptime(data.attrs[f'{var} data range'][1], '%Y-%m-%d').strftime('%-m/%-d/%Y')
p = figure(title='Daily {} records | {} - {}\n'.format(var.lower(), ts_start, ts_end).upper()+
'As of today, {} {} record highs/lows have been set. '.format(thisYearRecords, var.lower())+
'Last year, {} records were set.'.format(lastYearRecords),
background_fill_color='#404040', border_fill_color='#404040',
width=1000, height=600, x_axis_type='datetime',
y_range=(round_down(df['Record Low'].min(), 10), round_up(df['Record High'].max(), 10)),
tools='pan, wheel_zoom, box_zoom, undo, reset, fullscreen',
outline_line_color=None, sizing_mode='scale_height')
# This year record highs
hr = p.scatter(x='xdates', y='High Records', source=source,
name=f'{thisYear} High Record', size=6, color='white')
# This year record lows
lr = p.scatter(x='xdates', y='Low Records', source=source,
name=f'{thisYear} Low Record', size=6, color='white')
# Record highs
rh = p.scatter(x='xdates', y='Record High', source=source,
name='Record High', size=2,
color=colors[scheme]['Record High'])
# Average high
ah = p.line(x='xdates', y='Average High Curve', source=source,
name='Average High', width=3,
color=colors[scheme]['Average High'])
# Daily average
da = p.line(x='xdates', y='Daily Average Curve', source=source,
name='Daily Average', width=2,
color=colors[scheme]['Daily Average'])
# Average lows
al = p.line(x='xdates', y='Average Low Curve', source=source,
name='Average Low', width=3,
color=colors[scheme]['Average Low'])
# Record lows
rl = p.scatter(x='xdates', y='Record Low', source=source,
name='Record Low', size=2,
color=colors[scheme]['Record Low'],
hover_fill_color='white', hover_alpha=0.5)
# Flood thresholds (water level plot only)
if var=='Water Level' and threshold is not None:
for level, threshold in flood_thresholds.items():
hline = bm.Span(location=threshold, dimension='width',
line_dash=[20,8], line_alpha=0.75,
line_color='cadetblue', line_width=2)
p.renderers.extend([hline])
mytext = bm.Label(x=pd.to_datetime('2019-12-15'), y=threshold+0.1,
text=level.upper(), text_color='cadetblue',
text_font_size='8px',
text_font='arial narrow')
p.add_layout(mytext)
# Tools
crosshair = bm.CrosshairTool(dimensions='height',
line_color='grey', line_alpha=0.5)
hover = bm.HoverTool(mode='vline', renderers=[da],
formatters={'@xdates': 'datetime'})
hover.tooltips = """
<b> @xdates{{%b %d}} </b> <br>
Record High: @{{Record High}}{{0.0}} <br>
Average High: @{{Average High Curve}}{{0.0}} <br>
Daily Average: @{{Daily Average Curve}}{{0.0}} <br>
Average Low: @{{Average Low Curve}}{{0.0}} <br>
Record Low: @{{Record Low}}{{0.0}} <br>
{} High Record: @{{High Records}}{{0.0}} <br>
{} Low Record: @{{Low Records}}{{0.0}}
""".format(thisYear, thisYear)
p.add_tools(hover, crosshair)
p.toolbar.autohide = True
# x-axis
p.xaxis[0].formatter = bm.DatetimeTickFormatter(months="%b %d")
p.xaxis[0].ticker.desired_num_ticks = 12
p.xgrid.grid_line_color = None
p.xaxis.axis_line_color = 'grey'
p.xaxis.major_tick_line_color = 'grey'
# y-axis
p.yaxis.axis_label=f'{var} ({data.attrs[f"{var} units"]})'
p.yaxis.axis_label_text_color = colors[scheme]['Plot Light Color']
p.ygrid.grid_line_color = 'grey'
p.yaxis.axis_line_color = None
p.yaxis.major_tick_line_color = None
p.yaxis.minor_tick_line_color = None
# Fonts
p.title.text_font = 'arial narrow'
p.title.text_font_size = '16px'
p.title.text_color = colors[scheme]['Plot Light Color']
p.xaxis.major_label_text_font = 'arial narrow'
p.xaxis.major_label_text_color = colors[scheme]['Plot Light Color']
p.xaxis.major_label_text_font_size = "14px"
p.yaxis.major_label_text_font = 'arial narrow'
p.yaxis.axis_label_text_font = 'arial narrow'
p.yaxis.axis_label_text_font_style = 'normal'
p.yaxis.major_label_text_color = colors[scheme]['Plot Light Color']
p.yaxis.major_label_text_font_size = "14px"
p.yaxis.axis_label_text_font_size = "14px"
# Legend
legend = bm.Legend(items=[
('{} Record'.format(thisYear), [hr, lr]),
('Record High', [rh]),
('Average High', [ah]),
('Daily Average', [da]),
('Average Low', [al]),
('Record Low', [rl])],
background_fill_color='#404040', border_line_color=None,
label_text_color=colors[scheme]['Plot Light Color'],
location='center_right', click_policy='mute')
p.add_layout(legend, 'right')
# Show the results
show(p)
def monthly_climo(data, var, scheme='mg'):
"""Create a monthly climatology plot for environmental variable 'var'
from 'data' using color scheme 'scheme'.
Inputs:
data: xarray containing climatological stats
var: str, one of the available environmental variables in 'data'
scheme: str specifying which color scheme to use. Options: 'mg'
for M. Grossi's, 'bm' for B. McNoldy's, or 'cb' to use a
colorblind scheme
"""
# Dates for x axis
df = data.sel(variable=var).to_dataframe().drop('variable', axis=1).reset_index()
df['Average High Curve'] = cos_fit(df['Average High']).round(1)
df['Monthly Average Curve'] = cos_fit(df['Monthly Average']).round(1)
df['Average Low Curve'] = cos_fit(df['Average Low']).round(1)
# Record this year
thisYear = pd.to_datetime('today').year
thisYearRecords = (df==thisYear)[['Record High Year', 'Record Low Year']].sum().sum()
lastYearRecords = (df==thisYear-1)[['Record High Year', 'Record Low Year']].sum().sum()
df['High Records'] = df['Record High'].where(df['Record High Year'] == thisYear)
df['Low Records'] = df['Record Low'].where(df['Record Low Year'] == thisYear)
source = bm.ColumnDataSource(df)
# Create a new plot
ts_start = dt.strptime(data.attrs[f'{var} data range'][0], '%Y-%m-%d').strftime('%-m/%-d/%Y')
ts_end = dt.strptime(data.attrs[f'{var} data range'][1], '%Y-%m-%d').strftime('%-m/%-d/%Y')
p = figure(title='Monthly {} records | {} - {}\n'.format(var.lower(), ts_start, ts_end).upper()+
'As of today, {} {} record highs/lows have been set. '.format(thisYearRecords, var.lower())+
'Last year, {} records were set.'.format(lastYearRecords),
background_fill_color='#404040', border_fill_color='#404040',
width=1000, height=600,
x_range=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
y_range=(round_down(df['Record Low'].min(), 10), round_up(df['Record High'].max(), 10)),
tools='pan, wheel_zoom, box_zoom, undo, reset, fullscreen',
outline_line_color=None, sizing_mode='scale_height')
# This year record highs
hr = p.scatter(x='Month', y='High Records', source=source,
name=f'{thisYear} High Record', size=6, color='white')
# This year record lows
lr = p.scatter(x='Month', y='Low Records', source=source,
name=f'{thisYear} Low Record', size=6, color='white')
# Record highs
rh = p.scatter(x='Month', y='Record High', source=source,
name='Record High', size=7,
color=colors[scheme]['Record High'])
# Average high
ah = p.line(x='Month', y='Average High Curve', source=source,
name='Average High', width=4,
color=colors[scheme]['Average High'])
# Monthly average
ma = p.line(x='Month', y='Monthly Average Curve', source=source,
name='Monthly Average', width=3,
color=colors[scheme]['Monthly Average'])
# Average lows
al = p.line(x='Month', y='Average Low Curve', source=source,
name='Average Low', width=4,
color=colors[scheme]['Average Low'])
# Record lows
rl = p.scatter(x='Month', y='Record Low', source=source,
name='Record Low', size=7,
color=colors[scheme]['Record Low'],
hover_fill_color='white', hover_alpha=0.5)
# Tools
crosshair = bm.CrosshairTool(dimensions='height',
line_color='grey', line_alpha=0.5)
hover = bm.HoverTool(mode='vline', renderers=[ma],
formatters={'@xdates': 'datetime'})
hover.tooltips = """
<b> @Month </b> <br>
Record High: @{{Record High}}{{0.0}} <br>
Average High: @{{Average High Curve}}{{0.0}} <br>
Daily Average: @{{Daily Average Curve}}{{0.0}} <br>
Average Low: @{{Average Low Curve}}{{0.0}} <br>
Record Low: @{{Record Low}}{{0.0}} <br>
{} High Record: @{{High Records}}{{0.0}} <br>
{} Low Record: @{{Low Records}}{{0.0}}
""".format(thisYear, thisYear)
p.add_tools(hover, crosshair)
p.toolbar.autohide = True
# x-axis
p.xgrid.grid_line_color = None
p.xaxis.axis_line_color = 'grey'
p.xaxis.major_tick_line_color = 'grey'
# y-axis
p.yaxis.axis_label=f'{var} ({data.attrs[f"{var} units"]})'
p.yaxis.axis_label_text_color = colors[scheme]['Plot Light Color']
p.ygrid.grid_line_color = 'grey'
p.yaxis.axis_line_color = None
p.yaxis.major_tick_line_color = None
p.yaxis.minor_tick_line_color = None
# Fonts
p.title.text_font = 'arial narrow'
p.title.text_font_size = '16px'
p.title.text_color = colors[scheme]['Plot Light Color']
p.xaxis.major_label_text_font = 'arial narrow'
p.xaxis.major_label_text_color = colors[scheme]['Plot Light Color']
p.xaxis.major_label_text_font_size = "14px"
p.yaxis.major_label_text_font = 'arial narrow'
p.yaxis.axis_label_text_font = 'arial narrow'
p.yaxis.axis_label_text_font_style = 'normal'
p.yaxis.major_label_text_color = colors[scheme]['Plot Light Color']
p.yaxis.major_label_text_font_size = "14px"
p.yaxis.axis_label_text_font_size = "14px"
# Legend
legend = bm.Legend(items=[
('{} Record'.format(thisYear), [hr, lr]),
('Record High', [rh]),
('Average High', [ah]),
('Monthly Average', [ma]),
('Average Low', [al]),
('Record Low', [rl])],
background_fill_color='#404040', border_line_color=None,
label_text_color=colors[scheme]['Plot Light Color'],
location='center_right', click_policy='mute')
p.add_layout(legend, 'right')
# Show the results
show(p)Loading data
Now we need to load in the records for the desired station, which will be used to determine the directory from which to load the data. As before, stationname is the custom human-readable “City, ST” string for the station.
stationname = 'Virginia Key, FL'Derive the local directory name containing the data from the station name. This is the same way the directory was created when the data were downloaded.
dirname = camel(stationname)
outdir = os.path.join(os.getcwd(), dirname)
print(f"Station folder: {dirname}")
print(f"Full directory: {outdir}")Station folder: virginiaKeyFl
Full directory: /workspaces/climatology-quarto/virginiaKeyFl
Next, load the data and metadata.
# Records
days = xr.load_dataset(os.path.join(outdir, 'statistics-daily.nc'))
mons = xr.load_dataset(os.path.join(outdir, 'statistics-monthly.nc'))<frozen importlib._bootstrap>:241: RuntimeWarning: numpy.ndarray size changed, may indicate binary incompatibility. Expected 16 from C header, got 96 from PyObject
And finally, we can make some plots. Let’s look at daily and monthly climatology for Air Temperature.
var = 'Air Temperature'
daily_climo(data=days, var=var, flood_thresholds=floods, scheme='mg')monthly_climo(data=mons, var=var, scheme='mg')Data Table
One may wish to see the data behind these plots, or see the other records not plotted. We will use the great_tables library to display colored tables. We’ll demonstrate this below for Air Temperature.
great_tables displays dataframes, so we first need to extract the data from the xarray object, convert to a Pandas datarame, and reset the index.
stats = mons.sel(variable=var.title()).to_dataframe().drop('variable', axis=1).reset_index()
# Create the `great_tables`` object and add the columns
# We also specify any formatting of each column here including the color using the color dictionary defined above.
gtbl = GT(stats)
for column in stats.columns:
gtbl = gtbl.tab_style(style=[style.fill(color=colors['mg'][column]), style.text(align='center', v_align='middle')], locations=loc.body(columns=column))
# Now we format the rest of the table
gtbl = (gtbl
.cols_align(align='center')
.tab_style(style=[style.text(color='gainsboro', weight='bold'), style.fill(color='dimgray')], locations=loc.column_header())
.tab_options(table_font_size='13px', table_body_hlines_color='white'))
gtbl.show()| Month | Monthly Average | Record High Monthly Average | Record High Monthly Average Year | Record Low Monthly Average | Record Low Monthly Average Year | Average High | Lowest High | Lowest High Year | Record High | Record High Year | Average Low | Highest Low | Highest Low Year | Record Low | Record Low Year | Years |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Jan | 68.7 | 72.6 | 2013 | 63.0 | 2001 | 76.0 | 73.0 | 2011 | 78.0 | 2015 | 55.6 | 63.5 | 2013 | 48.3 | 1997 | 23 |
| Feb | 70.8 | 74.9 | 2018 | 65.5 | 1996 | 76.5 | 74.2 | 2000 | 78.6 | 2021 | 59.4 | 70.0 | 2018 | 47.9 | 1996 | 23 |
| Mar | 72.3 | 77.6 | 2003 | 66.1 | 2010 | 78.5 | 74.2 | 2010 | 82.8 | 2003 | 63.3 | 72.0 | 1997 | 55.1 | 1996 | 24 |
| Apr | 75.6 | 79.4 | 2020 | 72.8 | 2004 | 80.8 | 77.3 | 2004 | 85.8 | 2020 | 68.3 | 72.6 | 2015 | 61.2 | 2009 | 24 |
| May | 78.7 | 80.7 | 1995 | 77.0 | 2013 | 82.5 | 80.8 | 2014 | 85.2 | 1995 | 73.8 | 77.1 | 2003 | 67.9 | 1999 | 21 |
| Jun | 81.5 | 83.6 | 2010 | 79.8 | 2014 | 84.8 | 82.8 | 2014 | 87.6 | 2009 | 77.6 | 80.8 | 2004 | 75.1 | 1995 | 20 |
| Jul | 82.9 | 85.0 | 2023 | 81.0 | 2013 | 85.8 | 84.2 | 2012 | 88.7 | 2018 | 79.0 | 82.3 | 2022 | 76.1 | 2013 | 25 |
| Aug | 83.2 | 85.9 | 2022 | 81.8 | 1994 | 85.7 | 84.0 | 2003 | 88.5 | 2022 | 79.3 | 83.6 | 2022 | 76.1 | 1996 | 24 |
| Sep | 82.0 | 82.7 | 2017 | 80.6 | 2001 | 85.1 | 83.9 | 2000 | 86.7 | 2021 | 78.2 | 79.8 | 2009 | 74.3 | 2001 | 24 |
| Oct | 79.6 | 81.2 | 2020 | 77.5 | 2000 | 83.8 | 81.0 | 2010 | 86.8 | 2023 | 72.7 | 77.8 | 1995 | 64.6 | 2005 | 23 |
| Nov | 75.0 | 78.6 | 2015 | 71.4 | 2012 | 79.7 | 76.9 | 2012 | 82.0 | 2020 | 66.0 | 74.4 | 2020 | 57.4 | 2006 | 23 |
| Dec | 71.4 | 76.9 | 2015 | 62.1 | 2010 | 77.5 | 72.5 | 2010 | 79.6 | 1994 | 59.2 | 70.5 | 2015 | 48.8 | 2010 | 24 |
And there we have it! All of the records for each month, color coded for easier reading.
Some concluding remarks on the choice of packages here. Another common Python library for making interactive plots is Plotly. I tried this first (see below) but encountered a known issue with rendering Plotly plots in Quarto web dashboards. In short, the first time Plotly is called in a web application, the plot renders to the proper size of the web container, but subsequent calls to Plotly (like navigating to a new tab or page) do not size figures properly. The workaround demonstrated here fixed the width rendering, but all of the resulting plots were only half the height of the container/page. Plotly also supports displaying colored tables, but these experienced the same rendering issue with Quarto. Cue Bokeh. This library did not have the rendering problem, although the plots had slighly less interactivity than the Plotly versions. Creating colored tables with Bokeh, however, turned out to be frustratingly difficult and very poorly documented. For example, Bokeh tables are colored using HTML, but there was no documentation on how to color an entire column of data. In contrast, the library great_tables made this easy, although it too currently lacks the full interactivity that Plotly offered (e.g., sorting by column).
The following is a Plotly version of the daily climatology plot above. It is basically the same but supports some behaviors that, so far, are not possible (or much harder to accomplish) with Bokeh, such as only showing records in the hoverbox on days when a record is set.
import plotly.graph_objects as godef daily_climo(data, var, scheme='mg'):
"""Create a daily climatology plot for environmental variable 'var'
from 'data'.
Inputs:
data: xarray containing climatological stats
var: str, one of the available environmental variables in 'data'
scheme: str, either 'mg' or 'bm' specifying whether to use M. Grossi's
color scheme or B. McNoldy's
show: Bool, display the plot to screen instead of saving to file
"""
# Dates for x axis
xdates = pd.date_range(start='2020-01-01',end='2020-12-31', freq='1D')
df = data.sel(variable=var)
# Color dictionary
colors = dict(
mg=dict({
'Record High Year': 'white',
'Record High': '#d26c6c',
'Average High': '#dc8d8d',
'Daily Average': '#F5F5F5',
'Average Low': '#a2bff4',
'Record Low': '#74a0ef',
'Record Low Year': 'white'}),
bm=dict({
'Record High Year': 'white',
'Record High': 'orange',
'Average High': 'red',
'Daily Average': 'grey',
'Average Low': 'purple',
'Record Low': 'white'}
))
# Create figure
fig = go.Figure()
# Record highs
# High records this year
thisYear = pd.to_datetime('today').year
thisYearRecords = (df==thisYear).to_dataframe().drop('variable', axis=1)[['Record High Year', 'Record Low Year']].sum().sum()
lastYearRecords = (df==thisYear-1).to_dataframe().drop('variable', axis=1)[['Record High Year', 'Record Low Year']].sum().sum()
highRecords = df['Record High'].where(df['Record High Year'] == thisYear).to_dataframe()['Record High']
highRecords.index = pd.to_datetime(highRecords.index+'-2020')
lowRecords = df['Record Low'].where(df['Record Low Year'] == thisYear).to_dataframe()['Record Low']
lowRecords.index = pd.to_datetime(lowRecords.index+'-2020')
first_time = dt.strptime(df.attrs[f'{var} data range'][0], '%Y-%m-%d').strftime('%-m/%-d/%Y')
last_time = dt.strptime(df.attrs[f'{var} data range'][1], '%Y-%m-%d').strftime('%-m/%-d/%Y')
fig.add_trace(
go.Scatter(
x=highRecords.index, y=highRecords.values,
name=f'{pd.to_datetime("today").year} Record'.upper(),
mode='markers',
marker=dict(size=6, color='white'),
hovertext=[f'{thisYear} Record: {i}' if not pd.isnull(i) else '' for i in highRecords.values],
hoverinfo='text'
))
fig.add_trace(
go.Scatter(
x=lowRecords.index, y=lowRecords.values,
name='Low Record',
mode='markers',
marker=dict(size=6, color='white'),
hoverinfo='none'
))
fig.add_trace(
go.Scatter(
x=xdates, y=df['Record High'],
name='Record High'.upper(),
mode='markers',
marker=dict(size=3, color=colors[scheme]['Record High'])
))
# Average highs
fig.add_trace(
go.Scatter(
x=xdates, y=cos_fit(df['Average High']).round(1),
name='Average High'.upper(),
marker=dict(size=3, color=colors[scheme]['Average High'])
))
# Daily average
fig.add_trace(
go.Scatter(
x=xdates, y=cos_fit(df['Daily Average']).round(1),
name='Daily Average'.upper(),
marker=dict(size=3, color=colors[scheme]['Daily Average'])
))
# Average lows
fig.add_trace(
go.Scatter(
x=xdates,
y=cos_fit(df['Average Low']).round(1),
name='Average Low'.upper(),
marker=dict(size=3, color=colors[scheme]['Average Low'])
))
# Record lows
fig.add_trace(
go.Scatter(
x=xdates, y=df['Record Low'],
name='Record Low'.upper(),
mode='markers',
marker=dict(size=3, color=colors[scheme]['Record Low'])
))
# Hover box
fig.update_traces(
hoverlabel = dict(bordercolor='white')
)
# Plot settings
fig.update_layout(
template='plotly_dark',
paper_bgcolor='#404040',
plot_bgcolor='#404040',
height=600, width=1000,
title=dict(text='Daily {} records'.format(var.lower())+
'<br><sup>{}-{}</sup>'.format(first_time, last_time)+
'<br><sup>As of today, <b>{}</b> {} record highs/lows have been set. Last year, {} records were set.</sup>'.format(
thisYearRecords, var.lower(), lastYearRecords
),
font=dict(size=20)),
yaxis = dict(title=f'{var} ({data.attrs[f"{var} units"]})',
showgrid=True, gridcolor='grey'),
xaxis = dict(showgrid=False, showspikes=True,
dtick='M1', tickformat='%b %d'),
hovermode='x unified',
legend=dict(itemsizing='constant'),
hoverlabel=dict(font_size=12)
)
for trace in fig['data']:
if trace['name'] == 'Low Record':
trace['showlegend'] = False
fig.show()daily_climo(days, 'Air Temperature', scheme='mg')That concludes this climatology demonstration series.